function [mzaxis,interpdata,coords]=open_read_interp(directory,dataname,lowmzlimit,highmzlimit,spacing);
%This program will open mzXML files within a directory, extract out their
%mz data (axis and intensity), interpolate the data such that all
%spectra have the same x-axis, and save the workspace when finished. 

%These are designed to execute if there are fewer than 1000 pixels.  If
%there are 1000 or more, all the num2str commands will have to be adapted
%accordingly.

%You MUST set your current directory to the same folder as the one you list
%in the command line so matlab knows where to look for your data!!!

%Do not forget to use apostrophes for the directory and dataname inputs.

%This command takes a significant amount of time to execute.


%INPUTS
%directory - Here you type in the directory where your files are located
%and use quotes to surround the directory.  As an example you would replace
%directory with 'C:\Users\GroupMember\Desktop\test' 
%Make sure that only .mzXML files are located within this directory.

%dataname - My program runs for a few hours so it could be a bummer if the
%computer gets disturbed, your files aren't saved, and you have to rerun
%the program for several hours again.  Therefore, I built in an automatic
%save function.  This program will automatically save your data in a matlab
%workspace.  dataname is the filename of the saved file.  Replace it with
%whatever you wish, but it needs to be in quotes and needs to end with .mat
%so examples would be 'htp.mat' or 'mystuff.mat'

%lowmzlimit - The default m/z axis that this will generate will range
%throughout the entirety of your dataset.  If you wish to limit the m/z
%range for data interpolation, let lowmzlimit be the lower mz value.  If
%you want to use all the data contained within your dataset, set lowmzlimit
%equal to zero.

%highmzlimit - The default m/z axis that this will generate will range
%throughout the entirety of your dataset.  If you wish to limit the m/z
%range for data interpoltation, let highmzlimit be the upper mz value.  If
%you want to use all the data contained within your dataset, set
%highmzlimit equal to zero.

%spacing - This sets the resolution of the mass spectra in units of m/z.
%For protein work, 1 is sufficient.

%OUTPUTS
%mzaxis - An m/z axis for your interpolated data.  It will have a delta m/z
%of spacing.  The limits will be from the smallest m/z ratio measured within
%all of your pixels to the highest m/z ratio measured within all of your
%pixels.

%interpdata - This is a matrix containing all of your m/z data for all of
%your pixels.  Each row is a different mass spectrum at a specific pixel
%and each column is a m/z value.

%coords - This is a matrix containing the pixel coordinates for each of
%your samples.  Each row corresponds to a specific pixel/maldi sample.  The
%first column are the x coordinates and the second column contains the y
%coordinates.  This data is paired with interpdata.  As an example, the
%50th row of interp data contains a maldi spectrum taken at the xy
%coordinates shown on the 50th row of coords.

%The following command lines finds all .mzXML files in a particular
%directory.  In the next command line, replace the directory (first purple 
%part) within the line to whichever directory your files are located 
%within.  Make sure that ONLY mzXML files are located within the directory.
files = dir(fullfile(directory, '*.mzXML'));

%The next command lines will load the mass to charge ratios and intensities
%for all of the .mzXML files within a directory.  I number beginning at 100
%to correct for any mislabels (ex:  when sorting by counting, you would go
%1, 9, 90, and 100 but matlab will go 1, 100, 9, 90 when naming).

%To read the data, set the current directory to the same location as where
%your .mzxml files are located.
[aa,bb]=size(files);
for x=101:100+aa
    eval(['data', num2str(x), ' = mzxmlread(files(x-100,1).name);'])
    eval(['mz', num2str(x), ' = data', num2str(x), '.scan.peaks.mz(1:2:end);'])
    eval(['mz', num2str(x), '(:,2) = data', num2str(x), '.scan.peaks.mz(2:2:end);'])
    eval(['clear data', num2str(x), ';'])
    eval(['test=size(mz', num2str(x), ');'])
    a(x-100)=test(1);
    clear test
    eval(['b(x-100)=mz', num2str(x), '(1,1);'])
    eval(['c(x-100)=max(mz', num2str(x), '(:,1));'])
end
clear x


%The following commands will extract the XY coordinates from the file names
%of the data in your directory.  This assumes the coordinates are in a
%format of X###Y###.mzXML so if the format changes, you will need to change
%this section.
for x=1:aa
name=files(x,1).name;
[token, remain] = strtok(name,'Y');
[token2,remain2]=strtok(token,'X');
xpos(x)=str2num(token2);
[token3,remain3]=strtok(remain,'Y');
[token4,remain4]=strtok(token3,'.');
ypos(x)=str2num(token4);
clear token token2 token3 token4 remain remain2 remain3 remain4 se
end
clear x name
xpos=xpos-min(xpos)+1;
ypos=ypos-min(ypos)+1;
coords=vertcat(xpos,ypos)';
clear xpos ypos

%The following commands set the mzaxis limit.  This will also set the
%values over which your m/z data is interpolated.  If you want to
%artificially set your m/z limit yourself, change the command that reads
%totalaxis=b:c to different numbers.  b is the lower limit and c is the
%higher limit.  As a default, this program will interpolate the data from
%the lowest m/z value recorded from all of the pixels to the highest m/z
%value recorded from all of the pixels.

if (lowmzlimit==0) & (highmzlimit==0)
a=a';
b=min(b);
c=max(c);
a=max(a);
totalaxis=b:spacing:c;
clear a b c
test=size(totalaxis);
test=max(test);
mzaxis=totalaxis;
else
    totalaxis=lowmzlimit:spacing:highmzlimit;
    test=size(totalaxis);
    test=max(test);
    mzaxis=totalaxis;
end
mzaxis=double(mzaxis);
%The following does the interpolating.
for y=101:100+aa
    for x=1:test
        eval(['interpdata(y-100,x)=interp1(mz', num2str(y), '(:,1),mz', num2str(y), '(:,2),totalaxis(x));'])
    end
    eval(['clear mz', num2str(y), ';'])
    eval(['hh=100+aa-', num2str(y), ';'])
    eval(['disp(''Files Left ', num2str(hh), ''');'])
end
clear bb hh x y
%Sometimes the above command cannot make a good guess at a particular value
%and puts an "I don't know" value in the box.  The command line below
%removes it.
interpdata(isnan(interpdata)) = 0 ;
interpdata=double(interpdata);
clear x y aa bb directory hh test totalaxis files a b c highmzlimit lowmzlimit
%The following command saves the data.
save(dataname)